rm(list=ls())
library(tidyverse)

# TSE Codes USING NAMES ---------
mun_elec_2000_file <- list.files(here::here("data","raw","TSE"),pattern = "mun_elec_2000", full.names = T)
mun_elec_2000 <- read_rds(mun_elec_2000_file)

tse_codes_names <- mun_elec_2000 %>% 
  distinct(sigla_uf,sigla_ue,nome_municipio) %>% 
  mutate(tse_name =  str_to_upper(stringi::stri_trans_general(nome_municipio, "latin-ascii")),
         tse_name = str_replace_all(tse_name, "'|-"," "),
         tse_name = str_replace_all(tse_name, " ","")) %>% 
  filter(str_detect(sigla_ue,"[0-9]+")) %>% 
  mutate(sigla_ue = as.numeric(sigla_ue))

tse_to_ibge <- read_delim(here::here("data","raw","correspondencia_municipios_tse_ibge.csv"),
                          skip =1,
                          delim = ";",
                          col_names=c("uf","tse_name","tse_code","mun_code","mun_name"),
                          col_types = "ccccc",
                          locale = locale(encodin="latin1")) %>%
  #select(mun_code,tse_code) %>%
  mutate(mun_code = as.character(str_sub(mun_code,1,-2)),
         tse_code = as.numeric(tse_code),
         mun_name = str_to_upper(stringi::stri_trans_general(mun_name, "latin-ascii")),
         tse_name = str_to_upper(stringi::stri_trans_general(tse_name, "latin-ascii")),
         tse_name = str_replace_all(tse_name, "'|-"," "),
         tse_name = str_replace_all(tse_name, " ","")
  )

# Read and save data 1996 -----

# First round data -------
csv_files_1996_round1 <- list.files(here::here("data","raw","TSE","old_TSE_repo","mun_elec_1996", "round_1"), pattern = ".csv",full.names = T)

elec_1996_data_round1 <- map(csv_files_1996_round1,
                      read_delim,
                      delim = ",",
                      locale = locale("pt",encoding = "latin1"),
                      skip = 7,
                      col_names = c("descricao_cargo",
                          "sigla_uf",
                          "nome_municipio",
                          "sigla_partido",
                          "numero_cand",
                          "nome_candidato",
                          "total_votos",
                          "desc_sit_cand_tot",
                          "vote_share",
                          "composicao_coligacao",
                          "column_to_be_excluded"),
                      col_types = cols(
                        .default = "c"
                      )
                      ) %>% 
  bind_rows() %>% 
  select(-column_to_be_excluded)

# Eliminate grouping mark (tried specifying during reading but still getting warnings)

elec_1996_data_round1 <- elec_1996_data_round1 %>% 
  mutate(total_votos = as.numeric(str_replace_all(total_votos,",|\\.","")))


# Check if there is any missing 
elec_1996_data_round1 %>% filter(is.na(total_votos) | is.na(vote_share))

# Create num_turno var
elec_1996_data_round1 <- elec_1996_data_round1 %>% 
  mutate(num_turno = 1)

# Second round data -------
csv_files_1996_round2 <- list.files(here::here("data","raw","TSE","old_TSE_repo","mun_elec_1996", "round_2"), pattern = ".csv",full.names = T)

elec_1996_data_round2 <- map(csv_files_1996_round2,
                             read_csv,
                             locale = locale("pt",encoding = "latin1"),
                             skip = 7,
                             col_names = c("descricao_cargo",
                                           "sigla_uf",
                                           "nome_municipio",
                                           "sigla_partido",
                                           "numero_cand",
                                           "nome_candidato",
                                           "total_votos",
                                           "desc_sit_cand_tot",
                                           "vote_share",
                                           "composicao_coligacao",
                                           "column_to_be_excluded"),
                             col_types = cols(
                               .default = "c"
                             )
) %>% 
  bind_rows() %>% 
  select(-column_to_be_excluded)

# Eliminate grouping mark (tried specifying during reading but still getting warnings)

elec_1996_data_round2 <- elec_1996_data_round2 %>% 
  mutate(total_votos = as.numeric(str_replace_all(total_votos,",|\\.","")))


# Check if there is any missing 
elec_1996_data_round2 %>% filter(is.na(total_votos) | is.na(vote_share))

# Create num_turno var
elec_1996_data_round2 <- elec_1996_data_round2 %>% 
  mutate(num_turno = 2)

# Bind and adjust var format ------

elec_1996_data <-elec_1996_data_round1 %>%
  bind_rows(elec_1996_data_round2) %>% 
  mutate(composicao_coligacao = str_replace_all(composicao_coligacao, " / ", "/"),
         descricao_cargo = str_to_upper(descricao_cargo),
         desc_sit_cand_tot = str_to_upper(stringi::stri_trans_general(desc_sit_cand_tot, "latin-ascii")),
         sigla_partido = str_replace_all(sigla_partido," ",""),
         sigla_partido = str_to_upper(sigla_partido))

# Eliminate duplicates
elec_1996_data <- elec_1996_data %>% 
  distinct()

# Vote share variable has some cases with problems (like a negative sign)
# Compute vote_share based on total votes and compare

elec_1996_data <- elec_1996_data %>% 
  group_by(descricao_cargo,sigla_uf,nome_municipio,num_turno) %>% 
  mutate(total_votes_mun = sum(total_votos),
         vote_share_computed = total_votos/total_votes_mun*100,
         vote_share_check = abs(as.numeric(vote_share)-vote_share_computed))

# Only 245 cities with differences larger than 0.1% 
elec_1996_data %>% 
  filter(descricao_cargo == "PREFEITO") %>% 
  filter(!near(vote_share_check,0, tol = 0.1)) %>% 
  distinct(nome_municipio) %>% 
  nrow()# only 227 cities

# Given that the original vote shares seem problematic, use total votes as the main variable. 
elec_1996_data <- elec_1996_data %>% 
  select(-vote_share,-vote_share_check,-total_votes_mun, -vote_share_computed)

# Check names to match to tse codes
elec_1996_data_names <- elec_1996_data %>% 
  ungroup() %>% 
  distinct(sigla_uf,nome_municipio) %>% 
  mutate(tse_name = str_to_upper(stringi::stri_trans_general(nome_municipio, "latin-ascii")),
         tse_name = str_replace_all(tse_name, "'|-"," "),
         tse_name = str_replace_all(tse_name, " ",""))

# 22 cases not match. Some are just spelling
elec_1996_data_names %>% 
  anti_join(tse_codes_names, by = c("tse_name","sigla_uf"))

# Correct spelling to join tse codes
elec_1996_data <- elec_1996_data %>% 
  ungroup() %>% 
  mutate(nome_municipio = ifelse(nome_municipio == "CESARINA" & sigla_uf == "GO", "CEZARINA", nome_municipio),
         nome_municipio = ifelse(nome_municipio == "SAO LUIS DOS MONTES BELOS" & sigla_uf == "GO", "SAO LUIS DE MONTES BELOS", nome_municipio),
         nome_municipio = ifelse(nome_municipio == "TEREZINHA DE GOIAS" & sigla_uf == "GO", "TERESINA DE GOIAS", nome_municipio),
         nome_municipio = ifelse(nome_municipio == "VALPARAIZO" & sigla_uf == "GO", "VALPARAISO", nome_municipio),
         nome_municipio = ifelse(nome_municipio == "PIUM I" & sigla_uf == "MG", "PIUMHI", nome_municipio),
         nome_municipio = ifelse(nome_municipio == "SAO JOSE DO LAPA" & sigla_uf == "MG", "SAO JOSE DA LAPA", nome_municipio),
         nome_municipio = ifelse(nome_municipio == "BRODOSQUI" & sigla_uf == "SP", "BRODOWSKI", nome_municipio),
         nome_municipio = ifelse(nome_municipio == "MOJI DAS CRUZES" & sigla_uf == "SP", "MOGI DAS CRUZES", nome_municipio),
         tse_name = str_to_upper(stringi::stri_trans_general(nome_municipio, "latin-ascii")),
         tse_name = str_replace_all(tse_name, "'|-"," "),
         tse_name = str_replace_all(tse_name, " ",""))

elec_1996_data_names <- elec_1996_data %>% 
  distinct(sigla_uf,tse_name) 

# Still 14 cases to handle later
elec_1996_data_names %>% 
  anti_join(tse_codes_names, by = c("tse_name","sigla_uf"))

# Join TSE 
elec_1996_data <- elec_1996_data %>% 
  left_join(tse_codes_names, by = c("tse_name","sigla_uf")) 

# Check IBGE correspondence (OK only the 14 cases we had before)
elec_1996_data %>% 
  anti_join(tse_to_ibge, by = c("sigla_ue"="tse_code")) %>% 
  distinct(nome_municipio.x,sigla_uf)

elec_1996_data <- elec_1996_data %>% 
  left_join(tse_to_ibge, by = c("sigla_ue"="tse_code")) 

elec_1996_data %>% 
  filter(is.na(mun_code)) %>% 
  distinct(nome_municipio.x) # OK. still same 14 cases without ibge mun codes.

# Correct 14 cases without mun code
elec_1996_data <- elec_1996_data %>% 
  mutate(mun_name = ifelse(nome_municipio.x == "ITAPACI" & sigla_uf == "GO","ITAPACI", mun_name),
         mun_code = ifelse(nome_municipio.x == "ITAPACI" & sigla_uf == "GO","521090", mun_code),
         mun_name = ifelse(nome_municipio.x == "ALTAMIRA DO MARANHAO" & sigla_uf == "MA","ALTAMIRA DO MARANHAO", mun_name),
         mun_code = ifelse(nome_municipio.x == "ALTAMIRA DO MARANHAO" & sigla_uf == "MA","210040", mun_code),
         mun_name = ifelse(nome_municipio.x == "BREJO DE AREIA" & sigla_uf == "MA","BREJO DE AREIA", mun_name),
         mun_code = ifelse(nome_municipio.x == "BREJO DE AREIA" & sigla_uf == "MA","210215", mun_code),
         mun_name = ifelse(nome_municipio.x == "RETIRO" & sigla_uf == "PB","RETIRO", mun_name),
         mun_code = ifelse(nome_municipio.x == "RETIRO" & sigla_uf == "PB","250402", mun_code),
         mun_name = ifelse(nome_municipio.x == "TACIMA" & sigla_uf == "PB","TACIMA", mun_name),
         mun_code = ifelse(nome_municipio.x == "TACIMA" & sigla_uf == "PB","251640", mun_code),
         mun_name = ifelse(nome_municipio.x == "PARAU" & sigla_uf == "RN","PARAU", mun_name),
         mun_code = ifelse(nome_municipio.x == "PARAU" & sigla_uf == "RN","240870", mun_code),
         mun_name = ifelse(nome_municipio.x == "VINTE E SETE DA BOA VISTA" & sigla_uf == "RS","VINTE E SETE DA BOA VISTA", mun_name),
         mun_code = ifelse(nome_municipio.x == "VINTE E SETE DA BOA VISTA" & sigla_uf == "RS","340201", mun_code),
         mun_name = ifelse(nome_municipio.x == "OLIVEIRA DE FATIMA" & sigla_uf == "TO","OLIVEIRA DE FATIMA", mun_name),
         mun_code = ifelse(nome_municipio.x == "OLIVEIRA DE FATIMA" & sigla_uf == "TO","171550", mun_code),
         mun_name = ifelse(nome_municipio.x == "SAO BENTO DE POMBAL" & sigla_uf == "PB","SAO BENTINHO", mun_name),
         mun_code = ifelse(nome_municipio.x == "SAO BENTO DE POMBAL" & sigla_uf == "PB","251392", mun_code),
         mun_name = ifelse(nome_municipio.x == "SAO DOMINGOS DE CABACEIRAS" & sigla_uf == "PB","SAO DOMINGOS DO CARIRI", mun_name),
         mun_code = ifelse(nome_municipio.x == "SAO DOMINGOS DE CABACEIRAS" & sigla_uf == "PB","251394", mun_code),
         mun_name = ifelse(nome_municipio.x == "ERVAL" & sigla_uf == "RS","HERVAL", mun_name),
         mun_code = ifelse(nome_municipio.x == "ERVAL" & sigla_uf == "RS","430710", mun_code),
         mun_name = ifelse(nome_municipio.x == "PETRONIO PORTELA" & sigla_uf == "PI","NOVA SANTA RITA", mun_name),
         mun_code = ifelse(nome_municipio.x == "PETRONIO PORTELA" & sigla_uf == "PI","220795", mun_code))

# All cases solved         
elec_1996_data %>% 
  filter(is.na(mun_code)) %>% 
  distinct(nome_municipio.x, sigla_uf) 
         
# Select variables and save
elec_1996_data <- elec_1996_data %>% 
  mutate(ano_eleicao = 1996) %>% 
  relocate(ano_eleicao,mun_code,mun_name,sigla_uf) %>% 
  select(-nome_municipio.x,-tse_name.x,-tse_name.y,-sigla_ue,-uf,-nome_municipio.y)

# Save 1996 oldsys ------
write_rds(elec_1996_data,here::here("data","processed","elections","mun_elec_1996_oldsys.rds"))

